#1. Load Required Packages
required_packages <- c("readxl","PMCMRplus","dplyr")
for (package in required_packages) {
  if (!requireNamespace(package, quietly = TRUE)) {
    install.packages(package)
  }
  library(package, character.only = TRUE)
}
#Description: This section ensures the required packages (readxl, PMCMRplus) are installed and loaded.


#2. Import Data Frame
if (!exists("df")) {
  file_path <- "~/COHORT_Prediction_of_CIP.xlsx"
  df <- read_excel(file_path, sheet = "cohort", na = "#N/A")
}
#Description: This section imports data from an Excel file if the data frame df does not already exist.


#3. Filter Data
df_filtered <- df[, c("r_upper_suv_mean", "r_upper_suv_min", "r_upper_suv_max","r_upper_suv_sd", 
                      "r_lower_suv_mean", "r_lower_suv_min", "r_lower_suv_max","r_lower_suv_sd",
                      "l_upper_suv_mean", "l_upper_suv_min", "l_upper_suv_max","l_upper_suv_sd",
                      "l_lower_suv_mean", "l_lower_suv_min", "l_lower_suv_max","l_lower_suv_sd",
                      "side_r0_l1","pneumonitis","emphysema","copd","pet_device","pet_location","pet_distance_ici", "pet_impossible_spheres_placement",
                      "sex_male","bmi","weight","size","radio_after_pet", "pet_glucose","pet_dose", "pet_acquisition")]

df_filtered_distance <- subset(df_filtered, pet_distance_ici <= 365 | is.na(pet_distance_ici))
df_filtered_distance <- subset(df_filtered_distance, pet_distance_ici >= -1)
df_filtered <- subset(df_filtered_distance, pet_impossible_spheres_placement == 0)
suppressWarnings(df_filtered[] <- lapply(df_filtered, as.numeric))
#Description: This section filters the data to include only records where distance between PET/CT and immunotherapy is between -1 and 365 days.


#4. Add Columns for Calculations
cols <- c("no_tumor_suv_mean", "no_tumor_suv_min", "no_tumor_suv_max", "no_tumor_suv_sd", 
          "suv_95", "no_tumor_suv_95", "upper_suv_95", "lower_suv_95", 
          "sul_lung_mean","upper_sul_mean","lower_sul_mean","no_tumor_sul_mean",
          "sul_lung_max","upper_sul_max","lower_sul_max","no_tumor_sul_max","LBM")

df_filtered[,cols] <- NA
# Description: This section adds new columns to the filtered data frame for various calculated metrics.


#5. Define Functions for Calculations
# Define the function to calculate Lean Body Mass (LBM) based on BMI, height, and weight
calculate_LBM <- function(bmi, size, weight, sex) {
  size <- size*100
  if (sex == 1) {
    # male
    lbm <- (9.27 * 10^3 * weight) / ((6.68 * 10^3) + (216 * bmi))
  } else if (sex == 0) {
    # female
    lbm <- (9.27 * 10^3 * weight) / ((8.78 * 10^3) + (244 * bmi))
  } 
  return(lbm)
}

# Define the function to calculate SULmean and SULmax
calculate_SUL <- function(suv, lbm, weight) {
  sul <- NA
  if(!is.na(suv)){
    sul <- (suv / weight) * lbm
  }
  return(sul)
}
# Description: This section defines functions to calculate Lean Body Mass (LBM) and Standardized Uptake Value (SUL).


#6. Calculate SUV and Related Metrics
for (i in 1:nrow(df_filtered)) {
  if (is.na(df_filtered$l_upper_suv_mean[i]) & !is.na(df_filtered$r_upper_suv_mean[i])) {
    df_filtered$l_upper_suv_max[i] <- df_filtered$r_upper_suv_max[i]
    df_filtered$l_upper_suv_min[i] <- df_filtered$r_upper_suv_min[i]
    df_filtered$l_upper_suv_mean[i] <- df_filtered$r_upper_suv_mean[i]
    df_filtered$l_upper_suv_sd[i] <- df_filtered$r_upper_suv_sd[i]
  }
  if (is.na(df_filtered$r_upper_suv_mean[i]) & !is.na(df_filtered$l_upper_suv_mean[i])) {
    df_filtered$r_upper_suv_max[i] <- df_filtered$l_upper_suv_max[i]
    df_filtered$r_upper_suv_min[i] <- df_filtered$l_upper_suv_min[i]
    df_filtered$r_upper_suv_mean[i] <- df_filtered$l_upper_suv_mean[i]
    df_filtered$r_upper_suv_sd[i] <- df_filtered$l_upper_suv_sd[i]
  }
  if (is.na(df_filtered$l_lower_suv_mean[i]) & !is.na(df_filtered$r_lower_suv_mean[i])) {
    df_filtered$l_lower_suv_max[i] <- df_filtered$r_lower_suv_max[i]
    df_filtered$l_lower_suv_min[i] <- df_filtered$r_lower_suv_min[i]
    df_filtered$l_lower_suv_mean[i] <- df_filtered$r_lower_suv_mean[i]
    df_filtered$l_lower_suv_sd[i] <- df_filtered$r_lower_suv_sd[i]
  }
  if (is.na(df_filtered$r_lower_suv_mean[i]) & !is.na(df_filtered$l_lower_suv_mean[i])) {
    df_filtered$r_lower_suv_max[i] <- df_filtered$l_lower_suv_max[i]
    df_filtered$r_lower_suv_min[i] <- df_filtered$l_lower_suv_min[i]
    df_filtered$r_lower_suv_mean[i] <- df_filtered$l_lower_suv_mean[i]
    df_filtered$r_lower_suv_sd[i] <- df_filtered$l_lower_suv_sd[i]
  }
  if (!is.na(df_filtered$side_r0_l1[i])) {
    if (df_filtered$side_r0_l1[i] == 1 & !is.na(df_filtered$l_lower_suv_mean[i])) {
      df_filtered$no_tumor_suv_mean[i] <- mean(df_filtered$l_lower_suv_mean[i], df_filtered$l_upper_suv_mean[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_min[i] <- mean(df_filtered$l_lower_suv_min[i], df_filtered$l_upper_suv_min[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_max[i] <- mean(df_filtered$l_lower_suv_max[i], df_filtered$l_upper_suv_max[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_sd[i] <- mean(df_filtered$l_lower_suv_sd[i], df_filtered$l_upper_suv_sd[i], na.rm = TRUE)
    } else if (df_filtered$side_r0_l1[i] == 0 & !is.na(df_filtered$r_lower_suv_mean[i])) {
      df_filtered$no_tumor_suv_mean[i] <- mean(df_filtered$r_lower_suv_mean[i], df_filtered$r_upper_suv_mean[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_min[i] <- mean(df_filtered$r_lower_suv_min[i], df_filtered$r_upper_suv_min[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_max[i] <- mean(df_filtered$r_lower_suv_max[i], df_filtered$r_upper_suv_max[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_sd[i] <- mean(df_filtered$r_lower_suv_sd[i], df_filtered$r_upper_suv_sd[i], na.rm = TRUE)
    }
  }
}

df_filtered$upper_suv_mean <- rowMeans(df_filtered[, c("r_upper_suv_mean", "l_upper_suv_mean")], na.rm = TRUE)
df_filtered$upper_suv_max <- rowMeans(df_filtered[, c("r_upper_suv_max", "l_upper_suv_max")], na.rm = TRUE)
df_filtered$upper_suv_min <- rowMeans(df_filtered[, c("r_upper_suv_min", "l_upper_suv_min")], na.rm = TRUE)
df_filtered$upper_suv_sd <- rowMeans(df_filtered[, c("r_upper_suv_sd", "l_upper_suv_sd")], na.rm = TRUE)

df_filtered$lower_suv_mean <- rowMeans(df_filtered[, c("r_lower_suv_mean", "l_lower_suv_mean")], na.rm = TRUE)
df_filtered$lower_suv_max <- rowMeans(df_filtered[, c("r_lower_suv_max", "l_lower_suv_max")], na.rm = TRUE)
df_filtered$lower_suv_min <- rowMeans(df_filtered[, c("r_lower_suv_min", "l_lower_suv_min")], na.rm = TRUE)
df_filtered$lower_suv_sd <- rowMeans(df_filtered[, c("r_lower_suv_sd", "l_lower_suv_sd")], na.rm = TRUE)

df_filtered$suv_lung_mean <- rowMeans(df_filtered[, c("upper_suv_mean", "lower_suv_mean")], na.rm = TRUE)
df_filtered$suv_lung_max <- rowMeans(df_filtered[, c("upper_suv_max", "lower_suv_max")], na.rm = TRUE)
df_filtered$suv_lung_min <- rowMeans(df_filtered[, c("upper_suv_min", "lower_suv_min")], na.rm = TRUE)
df_filtered$suv_lung_sd <- rowMeans(df_filtered[, c("upper_suv_sd", "lower_suv_sd")], na.rm = TRUE)

for (i in 1:nrow(df_filtered)) {
  df_filtered$suv_95[i] <- df_filtered$suv_lung_mean[i] + (qnorm(0.95) * df_filtered$suv_lung_sd[i])
  if (!is.na(df_filtered$no_tumor_suv_mean[i])) {
    df_filtered$no_tumor_suv_95[i] <- df_filtered$no_tumor_suv_mean[i] + (qnorm(0.95) * df_filtered$no_tumor_suv_sd[i])
    df_filtered$upper_suv_95[i] <- df_filtered$upper_suv_mean[i] + (qnorm(0.95) * df_filtered$upper_suv_sd[i])
    df_filtered$lower_suv_95[i] <- df_filtered$lower_suv_mean[i] + (qnorm(0.95) * df_filtered$lower_suv_sd[i])
  }

  df_filtered$LBM[i] <- calculate_LBM(df_filtered$bmi[i], df_filtered$size[i], df_filtered$weight[i], df_filtered$sex_male[i])
  df_filtered$sul_lung_mean[i] <- calculate_SUL(df_filtered$suv_lung_mean[i], df_filtered$LBM[i], df_filtered$size[i])
  df_filtered$sul_lung_max[i] <- calculate_SUL(df_filtered$suv_lung_max[i], df_filtered$LBM[i], df_filtered$size[i])
  df_filtered$upper_sul_mean[i] <- calculate_SUL(df_filtered$upper_suv_mean[i], df_filtered$LBM[i], df_filtered$size[i])
  df_filtered$upper_sul_max[i] <- calculate_SUL(df_filtered$upper_suv_max[i], df_filtered$LBM[i], df_filtered$size[i])
  df_filtered$lower_sul_mean[i] <- calculate_SUL(df_filtered$lower_suv_mean[i], df_filtered$LBM[i], df_filtered$size[i])
  df_filtered$lower_sul_max[i] <- calculate_SUL(df_filtered$lower_suv_max[i], df_filtered$LBM[i], df_filtered$size[i])
  df_filtered$no_tumor_sul_mean[i] <- calculate_SUL(df_filtered$no_tumor_suv_mean[i], df_filtered$LBM[i], df_filtered$size[i])
  df_filtered$no_tumor_sul_max[i] <- calculate_SUL(df_filtered$no_tumor_suv_max[i], df_filtered$LBM[i], df_filtered$size[i])
}
# Description: This section fills in missing SUV values and calculates various metrics based on available data


#7. Set Variables for Loop Through Variables and Run Tests
variables <- c("suv_lung_mean","upper_suv_mean","lower_suv_mean","no_tumor_suv_mean",
               "suv_lung_max","upper_suv_max","lower_suv_max","no_tumor_suv_max",
               "suv_95","upper_suv_95","lower_suv_95","no_tumor_suv_95",
               "sul_lung_mean","upper_sul_mean","lower_sul_mean","no_tumor_sul_mean",
               "sul_lung_max","upper_sul_max","lower_sul_max","no_tumor_sul_max")
# Initialize Results Data Frame
results <- data.frame(Variable = character(0), p_value = numeric(0))

# Loop Through Variables and Perform Kruskal-Wallis Test
for (variable in variables)  {
  formula <- as.formula(paste(variable, "~ pet_device"))
  model <- kruskal.test(formula, data = df_filtered)
  results <- rbind(results, data.frame(Variable = variable, p_value = round(model$p.value,3)))
}
#Description: This code initializes a list of variables and performs Kruskal-Wallis tests for each variable to compare their distributions across different levels of the PET scanner. The results, including p-values, are collected in a data frame.


#8. Function to Insert Rows in Results Data Frame
insert_row <- function(variables, row){
  new_row <- data.frame(Variable = variables, p_value = "", Significance = "")
  results <<- rbind(results[0:row,], new_row, results[(row+1):nrow(results),])
}
#Description: Defines a function insert_row that adds a new row into the results data frame at a specified index. This function is used to introduce new categories or groupings into the results table.


#9. Add Significance Annotations to Results Data Frame
for (i in 1:nrow(results)) {
  if (results$p_value[i] < 0.001) {
    results$Significance[i] <- "***"
  } else if (results$p_value[i] < 0.01) {
    results$Significance[i] <- "**"
  } else if (results$p_value[i] < 0.05) {
    results$Significance[i] <- "*"
  } else {
    results$Significance[i] <- ""
  }
}
#Description: This block iterates through the rows of the results data frame, assigning significance stars based on p-value thresholds. This helps in quickly identifying statistically significant results.


#10. Apply Bonferroni Correction
variable_count <- length(variables)
variable_count
adjusted_alpha <- 0.05 / variable_count
print(paste("adjusted alpha after Bonferroni correction: p-value <",round(adjusted_alpha,4)))
#Description: Calculates the Bonferroni-adjusted alpha level for multiple comparisons. This adjustment is necessary to control for Type I errors due to multiple testing.


#11. Insert Rows and Rename Variables
insert_row("SUVMEAN", 0)
insert_row("SUVMAX", 5)
insert_row("SUV95", 10)
insert_row("SULMEAN", 15)
insert_row("SULMAX", 20)
rownames(results) <- 1:nrow(results)


results[results == "suv_lung_mean"] <- "whole lung"
results[results == "upper_suv_mean"] <- "upper lung"
results[results == "lower_suv_mean"] <- "lower lung"
results[results == "no_tumor_suv_mean"] <- "TFL"

results[results == "suv_lung_max"] <- "whole lung"
results[results == "upper_suv_max"] <- "upper lung"
results[results == "lower_suv_max"] <- "lower lung"
results[results == "no_tumor_suv_max"] <- "TFL"

results[results == "suv_95"] <- "whole lung"
results[results == "upper_suv_95"] <- "upper lung"
results[results == "lower_suv_95"] <- "lower lung"
results[results == "no_tumor_suv_95"] <- "TFL"

results[results == "sul_lung_mean"] <- "whole lung"
results[results == "upper_sul_mean"] <- "upper lung"
results[results == "lower_sul_mean"] <- "lower lung"
results[results == "no_tumor_sul_mean"] <- "TFL"

results[results == "sul_lung_max"] <- "whole lung"
results[results == "upper_sul_max"] <- "upper lung"
results[results == "lower_sul_max"] <- "lower lung"
results[results == "no_tumor_sul_max"] <- "TFL"
#Description: Inserts rows into the results data frame to group the variables into meaningful categories and updates the variable names for better readability and interpretation.


#12. Print Additional Notes
print (paste("While there was no significant difference between SUVMEAN values (p =",
             round(min(as.numeric(results$p_value[c(2:5)])),3), "–",
             round(max(as.numeric(results$p_value[c(2:5)])),3), "), SUV95 (",
             round(min(as.numeric(results$p_value[c(12:15)])),3), "–",
             round(max(as.numeric(results$p_value[c(12:15)])),3), "), and SULMEAN (",
             round(min(as.numeric(results$p_value[c(17:20)])),3), "–",
             round(max(as.numeric(results$p_value[c(17:20)])),3), ") across all devices, there were significant differences of SUVMAX of the whole lung, lower lung and the tumor free lung (p =",
             round(min(as.numeric(results$p_value[7:10])),3), "–",
             round(max(as.numeric(results$p_value[c(7,9:10)])),3), "), and SULMAX of the tumor free lung (p =",
             round(min(as.numeric(results$p_value[25])),3), "). For further analyses with SUVMAX and SULMAX, the subgroup of",
             nrow(df_filtered_subgroup), "patients scanned with the same PET/CT device was used."))

print(results)
#Description: Prints a summary of the results, including the number of recordings evaluated and the key findings with significance annotations.


#13. Save Results
write.csv(results, file = "Table S1.csv", row.names = FALSE)
print("saved:Table S1.csv")
#Saves the results to a CSV file. 

